// Copyright 2024 The Google Research Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package neighborhood;

// For FIXED_LEN, batching will assert that all sequences are exactly
// the same length.
//
// For PAD_MAX_LEN, batching will pad those sequences in the batch that
// are shorter than the longest sequence. Only the pad_dim dimension
// can have inconsistent lengths, other dimensions have to have the
// same length. Padded values are set to be pad_value for arithmic
// types. For string tensors, no initiialization is done, for non-float
// arithmetic types, the value is cast.
//
// For PAD_FIXED_LEN, the operation is like PAD_MAX_LEN but all
// sequences are made fixed_len long. A sequence in a batch exceeding
// this length will result in a run-time error.
//
// The fixate_size field is used in conjunction with the fixed length
// options. If set, the input tensor will be reshaped to the
// fixate_size. This is required for TPU operation that needs a fully
// defined tensor size and cannot deal with a dynamic size. The Hanasu
// Op produced tensors are reported as having a dynamic size, hence an
// explicit reshape is required in that case.
message PaddedBatchOpts {
  enum PadType {
    PADTYPE_UNSPECIFIED = 0;
    PAD_MAX_LEN = 1;
    PAD_FIXED_LEN = 2;
    FIXED_LEN = 3;
  }
  PadType pad_type = 1;
  int32 pad_dim = 2;
  int32 fixed_len = 3;
  float pad_value = 4;
  repeated int32 fixate_size = 5;
}

message FeatureNeighborhoodTensorOpts {
  message SymbolsAndMarkers {
    string start_of_sentence = 1;
    string end_of_sentence = 2;
    string symbols = 3;
  }
  SymbolsAndMarkers input = 1;
  SymbolsAndMarkers output = 2;
  bool append_eos = 3;
  int32 max_spelling_len = 4;
  int32 max_pronunciation_len = 5;
  int32 max_neighbors = 6;
  PaddedBatchOpts batch_opts = 7;
  bool split_output_on_space = 8;
}

message FeatureNeighborhoodOpOpts {
  repeated FeatureNeighborhoodTensorOpts feature_neighborhoods = 1;
}
